import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from itables import show
import os
import glob
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
tqdm.pandas()
files = sorted(glob.glob("data/classified/climate_tweets_*.csv"))
print(files)
pd.set_option('display.max_colwidth', -1)
['data/classified/climate_tweets_2006.csv', 'data/classified/climate_tweets_2007.csv', 'data/classified/climate_tweets_2008.csv', 'data/classified/climate_tweets_2009.csv', 'data/classified/climate_tweets_2010.csv', 'data/classified/climate_tweets_2011.csv', 'data/classified/climate_tweets_2012.csv', 'data/classified/climate_tweets_2013.csv', 'data/classified/climate_tweets_2014.csv', 'data/classified/climate_tweets_2015.csv', 'data/classified/climate_tweets_2016.csv', 'data/classified/climate_tweets_2017.csv', 'data/classified/climate_tweets_2018.csv', 'data/classified/climate_tweets_2019.csv', 'data/classified/climate_tweets_2020.csv']
<ipython-input-1-31bef66501b7>:11: FutureWarning: Passing a negative integer is deprecated in version 1.0 and will not be supported in future version. Instead, use None to not limit the column width.
pd.set_option('display.max_colwidth', -1)
%%time
df = pd.concat(pd.read_csv(f, sep=";", engine="c", lineterminator="\n") for f in tqdm(files))
CPU times: user 13min 54s, sys: 1min 44s, total: 15min 38s Wall time: 22min 15s
print(len(df))
32335076
%%time
df.timestamp = pd.to_datetime(df.timestamp)
df.index = df.timestamp
CPU times: user 1min 26s, sys: 2.91 s, total: 1min 29s Wall time: 1min 28s
daycounts = df.timestamp.resample("D").count()
daycounts[daycounts == 0].index
DatetimeIndex(['2006-06-07', '2006-06-08', '2006-06-09', '2006-06-10',
'2006-06-11', '2006-06-12', '2006-06-13', '2006-06-14',
'2006-06-15', '2006-06-16',
...
'2010-03-15', '2010-03-16', '2010-03-17', '2010-03-18',
'2010-03-19', '2010-03-20', '2010-03-21', '2019-08-29',
'2020-02-17', '2020-02-18'],
dtype='datetime64[ns]', name='timestamp', length=230, freq=None)
df.columns
Index(['username', 'fullname', 'user_id', 'tweet_id', 'tweet_url', 'timestamp',
'timestamp_epochs', 'replies', 'retweets', 'likes', 'is_retweet',
'retweeter_username', 'retweeter_userid', 'retweet_id', 'text', 'html',
'text_with_emoji', 'neg', 'neu', 'pos', 'compound'],
dtype='object')
df.timestamp.resample("M").count().median()
159389.5
df.timestamp.resample("M").count().plot(figsize=(15,15), title="Climate twitter - number of tweets")
df.timestamp[df["compound"] > 0.05].resample("M").count().plot(color='g')
df.timestamp[df["compound"] < -0.05].resample("M").count().plot(color='r')
df.timestamp[df["compound"].between(-.05, .05)].resample("M").count().plot()
plt.legend(["All tweets", "Positive tweets", "Negative tweets", "Neutral tweets"])
plt.ylabel("# of tweets per month")
plt.show()
#et_by_month_pc = et_by_month.count() / by_month.count() * 100
all_tweets_monthly = df.timestamp.resample("M").count()
pos_tweets = df.timestamp[df["compound"] > 0.05].resample("M").count()
pos_tweets_pc = pos_tweets / all_tweets_monthly * 100
neg_tweets = df.timestamp[df["compound"] < -0.05].resample("M").count()
neg_tweets_pc = neg_tweets / all_tweets_monthly * 100
neutral_tweets = df.timestamp[df["compound"].between(-.05, .05)].resample("M").count()
neutral_tweets_pc = neutral_tweets / all_tweets_monthly * 100
pos_tweets_pc.plot(figsize=(15,15), title="Climate twitter - tweet sentiment as percentage of all climate tweets - by month",color="g")
neg_tweets_pc.plot(color='r')
neutral_tweets_pc.plot()
plt.legend(["Positive tweets", "Negative tweets", "Neutral tweets"])
plt.xlabel("year")
plt.ylabel("% of tweets per month")
plt.show()
#et_by_month_pc = et_by_month.count() / by_month.count() * 100
all_tweets_monthly = df.timestamp.resample("W").count()
pos_tweets = df.timestamp[df["compound"] > 0.05].resample("W").count()
pos_tweets_pc = pos_tweets / all_tweets_monthly * 100
neg_tweets = df.timestamp[df["compound"] < -0.05].resample("W").count()
neg_tweets_pc = neg_tweets / all_tweets_monthly * 100
neutral_tweets = df.timestamp[df["compound"].between(-.05, .05)].resample("W").count()
neutral_tweets_pc = neutral_tweets / all_tweets_monthly * 100
pos_tweets_pc.plot(figsize=(15,15), title="Climate twitter - tweet sentiment as percentage of all climate tweets - by week",color="g")
neg_tweets_pc.plot(color='r')
neutral_tweets_pc.plot()
plt.legend(["Positive tweets", "Negative tweets", "Neutral tweets"])
plt.xlabel("year")
plt.ylabel("% of tweets per month")
plt.show()
%%time
def display_filtered_df(df, pattern, n=5000, extracols = []):
if len(df) < n:
filtered_df = df
else:
filtered_df = df.sample(n)
filtered_df["text_with_emoji_formatted"] = filtered_df.text_with_emoji.str.replace(pattern, lambda x: "<b>" + x.group() + "</b>", case=False)
show(filtered_df[["text_with_emoji_formatted", "compound"] + extracols],
order=[[2, 'desc']],
orderClasses=False,
createdRow="""function( row, data, dataIndex ) {
if (data[2] > 0) {
$(row).css('background-color', 'rgba(0,255,0,.2)');
} else if (data[2] == 0) {
$(row).css('background-color', 'rgba(0,0,255,.2)');
} else {
$(row).css('background-color', 'rgba(255,0,0,.2)');
}
}""", scrollY="800px", scrollCollapse=True, paging=False, columnDefs=[{"width": "120px", "targets": "_all"}])
pattern = r"global.?warming|climate.?chang|sea.?level.?ris|rising.?sea.?level|climate.?crisis|climate.?action|extreme.?weather|biodiversity|IPCC|Paris.?accord"
display_filtered_df(df, pattern)
<timed exec>:6: FutureWarning: The default value of regex will change from True to False in a future version. WARNING:itables.downsample:showing 2500x2 of 5000x2 as nbytes=80000>65536=maxBytes. See https://mwouts.github.io/itables/#downsampling
| text_with_emoji_formatted | compound | |
|---|---|---|
| timestamp |
CPU times: user 18.3 s, sys: 6.72 s, total: 25 s Wall time: 25 s
df["compound"].resample("M").mean().plot(figsize=(20,20), title="Mean sentiment per month")
<AxesSubplot:title={'center':'Mean sentiment per month'}, xlabel='timestamp'>
df.timestamp[df.text.str.contains(r"\bhot\b", na=False)].resample("M").count().plot(figsize=(15, 15), title="Global hot vs cold tweets", color="r")
df.timestamp[df.text.str.contains(r"\bcold\b", na=False)].resample("M").count().plot(color="b")
plt.ylabel("# of tweets per month")
plt.show()
print(df.columns)
display_filtered_df(df[df.likes > 2e5], pattern, 500, ["username", "likes"])
Index(['username', 'fullname', 'user_id', 'tweet_id', 'tweet_url', 'timestamp',
'timestamp_epochs', 'replies', 'retweets', 'likes', 'is_retweet',
'retweeter_username', 'retweeter_userid', 'retweet_id', 'text', 'html',
'text_with_emoji', 'neg', 'neu', 'pos', 'compound'],
dtype='object')
<timed exec>:6: FutureWarning: The default value of regex will change from True to False in a future version. <timed exec>:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
| text_with_emoji_formatted | compound | username | likes | |
|---|---|---|---|---|
| timestamp |
display_filtered_df(df[df.username == "realDonaldTrump"], pattern)
<timed exec>:6: FutureWarning: The default value of regex will change from True to False in a future version. <timed exec>:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
| text_with_emoji_formatted | compound | |
|---|---|---|
| timestamp |
%%time
pattern = r"fake|not real|isn'?t real|doesn'?t exist|hoax|propaganda|conspiracy"
deniers = df[df.text.str.contains(pattern, case=False, na=False)]
print(len(deniers))
display_filtered_df(deniers, pattern)
<timed exec>:6: FutureWarning: The default value of regex will change from True to False in a future version. WARNING:itables.downsample:showing 2500x2 of 5000x2 as nbytes=80000>65536=maxBytes. See https://mwouts.github.io/itables/#downsampling
942970
| text_with_emoji_formatted | compound | |
|---|---|---|
| timestamp |
CPU times: user 11min 2s, sys: 193 ms, total: 11min 3s Wall time: 11min 3s
deniers.timestamp.resample("M").count().plot(figsize=(15,15), title="Climate denial twitter - number of tweets")
plt.ylabel("# of tweets per month")
plt.show()
all_tweets_monthly = df.timestamp.resample("M").count()
denier_tweets = deniers.timestamp.resample("M").count()
denier_tweet_pc = denier_tweets / all_tweets_monthly * 100
denier_tweet_pc.plot(figsize=(15,15), title="Climate denier twitter - percentage of all climate tweets - by month",color="r")
plt.ylabel("% of tweets per month")
plt.show()
#%%time
# Save datasets of unique users and ids for bot scoring. Takes ~13min
#df[["username", "user_id"]].value_counts().to_csv("data/users.csv")
botscores = pd.read_csv("data/botscores.csv")
botscores
| user_id | screen_name | prob_bot | n_tweets | |
|---|---|---|---|---|
| 0 | 731164510489313280 | GCCThinkActTank | 0.368905 | 63791 |
| 1 | 330509973 | annemariayritys | 0.007271 | 47821 |
| 2 | 176470818 | Vandahmier | 0.159065 | 41108 |
| 3 | 1079911297 | NaN | NaN | 39807 |
| 4 | 2365497828 | ZEROCO2_ | 0.001946 | 31124 |
| ... | ... | ... | ... | ... |
| 313225 | 1305381488 | KyleJDeFreitas | 0.001114 | 14 |
| 313226 | 947635931590287361 | NaN | NaN | 14 |
| 313227 | 3376148665 | blokeinthecrowd | 0.001657 | 14 |
| 313228 | 3270003739 | _GoSpace | 0.029052 | 14 |
| 313229 | 18589806 | StacyLeMelle | 0.000483 | 14 |
313230 rows × 4 columns
%%time
merged = pd.merge(df, botscores, on="user_id")
merged
CPU times: user 2min 57s, sys: 7.02 s, total: 3min 4s Wall time: 3min 2s
| username | fullname | user_id | tweet_id | tweet_url | timestamp | timestamp_epochs | replies | retweets | likes | ... | text | html | text_with_emoji | neg | neu | pos | compound | screen_name | prob_bot | n_tweets | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | eversion | Rob Annable | 46673 | 1893063 | /eversion/status/1893063 | 2006-12-31 10:47:25 | 1167562045 | 0 | 0 | 0 | ... | Warmer climate. Wider grain. Easier to cut. Thank you global warming. | <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en">Warmer climate. Wider grain. Easier to cut. Thank you <strong>global warming</strong>.</p> | Warmer climate. Wider grain. Easier to cut. Thank you global warming . | 0.115 | 0.385 | 0.500 | 0.7184 | eversion | 0.004161 | 14 |
| 1 | eversion | Rob Annable | 46673 | 1893063 | /eversion/status/1893063 | 2006-12-31 10:47:25 | 1167562045 | 0 | 0 | 0 | ... | Warmer climate. Wider grain. Easier to cut. Thank you global warming. | <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en">Warmer climate. Wider grain. Easier to cut. Thank you <strong>global warming</strong>.</p> | Warmer climate. Wider grain. Easier to cut. Thank you global warming . | 0.115 | 0.385 | 0.500 | 0.7184 | eversion | 0.004161 | 14 |
| 2 | eversion | Rob Annable | 46673 | 826756380 | /eversion/status/826756380 | 2008-06-04 11:43:26 | 1212579806 | 0 | 0 | 0 | ... | Failing to blog images from Birmingham's Climate Change festival because of broken iPhone camera | <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en">Failing to blog images from Birmingham's <strong>Climate Change</strong> festival because of broken iPhone camera</p> | Failing to blog images from Birmingham's Climate Change festival because of broken iPhone camera | 0.311 | 0.534 | 0.155 | -0.4939 | eversion | 0.004161 | 14 |
| 3 | eversion | Rob Annable | 46673 | 812123666 | /eversion/status/812123666 | 2008-05-15 17:36:29 | 1210872989 | 0 | 0 | 0 | ... | Wondering what I'm going to do with the stall I've been offered at the Birmingham Climate Change Festival | <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en">Wondering what I'm going to do with the stall I've been offered at the Birmingham <strong>Climate Change</strong> Festival</p> | Wondering what I'm going to do with the stall I've been offered at the Birmingham Climate Change Festival | 0.086 | 0.762 | 0.152 | 0.3400 | eversion | 0.004161 | 14 |
| 4 | eversion | Rob Annable | 46673 | 5534149643 | /eversion/status/5534149643 | 2009-11-08 15:53:14 | 1257695594 | 0 | 0 | 0 | ... | Kids on climate change and low energy buildings: "Dad, is that why you've been working so much? Because you've got to save the earth?" | <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en">Kids on <strong>climate change</strong> and low energy buildings: "Dad, is that why you've been working so much? Because you've got to save the earth?"</p> | Kids on climate change and low energy buildings: "Dad, is that why you've been working so much? Because you've got to save the earth?" | 0.073 | 0.730 | 0.197 | 0.5514 | eversion | 0.004161 | 14 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 19663644 | TheHipHopMayor | Likwid Nomana | 1206256844827697152 | 1213523146885779456 | /TheHipHopMayor/status/1213523146885779456 | 2020-01-04 18:10:39 | 1578161439 | 0 | 0 | 0 | ... | We had #20plentyHipHopSessions Ko Tlhageng yesterday at Ventersdorp. #Blacktwitter #ClimateChange #IranWar #hiphop #music #beats #RhymeFestZApic.twitter.com/wXkllYlPSM | <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="in">We had <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/20plentyHipHopSessions?src=hash"><s>#</s><b>20plentyHipHopSessions</b></a> Ko Tlhageng yesterday at Ventersdorp. <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/Blacktwitter?src=hash"><s>#</s><b>Blacktwitter</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/ClimateChange?src=hash"><s>#</s><b><strong>ClimateChange</strong></b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/IranWar?src=hash"><s>#</s><b>IranWar</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/hiphop?src=hash"><s>#</s><b>hiphop</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/music?src=hash"><s>#</s><b>music</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/beats?src=hash"><s>#</s><b>beats</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/RhymeFestZA?src=hash"><s>#</s><b>RhymeFestZA</b></a><a class="twitter-timeline-link u-hidden" data-pre-embedded="true" dir="ltr" href="https://t.co/wXkllYlPSM">pic.twitter.com/wXkllYlPSM</a></p> | We had #20plentyHipHopSessions Ko Tlhageng yesterday at Ventersdorp. #Blacktwitter #ClimateChange #IranWar #hiphop #music #beats #RhymeFestZA pic.twitter.com/wXkllYlPSM | 0.000 | 1.000 | 0.000 | 0.0000 | TheHipHopMayor | 0.002180 | 23 |
| 19663645 | TheHipHopMayor | Likwid Nomana | 1206256844827697152 | 1213523113503285249 | /TheHipHopMayor/status/1213523113503285249 | 2020-01-04 18:10:31 | 1578161431 | 0 | 0 | 0 | ... | We had #20plentyHipHopSessions Ko Tlhageng yesterday at Ventersdorp. #Blacktwitter #ClimateChange #IranWar #hiphop #music #beats #RhymeFestZApic.twitter.com/ltDetqUdWW | <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="in">We had <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/20plentyHipHopSessions?src=hash"><s>#</s><b>20plentyHipHopSessions</b></a> Ko Tlhageng yesterday at Ventersdorp. <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/Blacktwitter?src=hash"><s>#</s><b>Blacktwitter</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/ClimateChange?src=hash"><s>#</s><b><strong>ClimateChange</strong></b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/IranWar?src=hash"><s>#</s><b>IranWar</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/hiphop?src=hash"><s>#</s><b>hiphop</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/music?src=hash"><s>#</s><b>music</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/beats?src=hash"><s>#</s><b>beats</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/RhymeFestZA?src=hash"><s>#</s><b>RhymeFestZA</b></a><a class="twitter-timeline-link u-hidden" data-pre-embedded="true" dir="ltr" href="https://t.co/ltDetqUdWW">pic.twitter.com/ltDetqUdWW</a></p> | We had #20plentyHipHopSessions Ko Tlhageng yesterday at Ventersdorp. #Blacktwitter #ClimateChange #IranWar #hiphop #music #beats #RhymeFestZA pic.twitter.com/ltDetqUdWW | 0.000 | 1.000 | 0.000 | 0.0000 | TheHipHopMayor | 0.002180 | 23 |
| 19663646 | TheHipHopMayor | Likwid Nomana | 1206256844827697152 | 1213523088027062274 | /TheHipHopMayor/status/1213523088027062274 | 2020-01-04 18:10:25 | 1578161425 | 0 | 0 | 0 | ... | We had #20plentyHipHopSessions Ko Tlhageng yesterday at Ventersdorp. #Blacktwitter #ClimateChange #IranWar #hiphop #music #beats #RhymeFestZApic.twitter.com/MfsPu9687W | <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="in">We had <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/20plentyHipHopSessions?src=hash"><s>#</s><b>20plentyHipHopSessions</b></a> Ko Tlhageng yesterday at Ventersdorp. <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/Blacktwitter?src=hash"><s>#</s><b>Blacktwitter</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/ClimateChange?src=hash"><s>#</s><b><strong>ClimateChange</strong></b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/IranWar?src=hash"><s>#</s><b>IranWar</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/hiphop?src=hash"><s>#</s><b>hiphop</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/music?src=hash"><s>#</s><b>music</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/beats?src=hash"><s>#</s><b>beats</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/RhymeFestZA?src=hash"><s>#</s><b>RhymeFestZA</b></a><a class="twitter-timeline-link u-hidden" data-pre-embedded="true" dir="ltr" href="https://t.co/MfsPu9687W">pic.twitter.com/MfsPu9687W</a></p> | We had #20plentyHipHopSessions Ko Tlhageng yesterday at Ventersdorp. #Blacktwitter #ClimateChange #IranWar #hiphop #music #beats #RhymeFestZA pic.twitter.com/MfsPu9687W | 0.000 | 1.000 | 0.000 | 0.0000 | TheHipHopMayor | 0.002180 | 23 |
| 19663647 | TheHipHopMayor | Likwid Nomana | 1206256844827697152 | 1213523070117404672 | /TheHipHopMayor/status/1213523070117404672 | 2020-01-04 18:10:20 | 1578161420 | 0 | 0 | 0 | ... | We had #20plentyHipHopSessions Ko Tlhageng yesterday at Ventersdorp. #Blacktwitter #ClimateChange #IranWar #hiphop #music #beats #RhymeFestZApic.twitter.com/ci341dGBoc | <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="in">We had <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/20plentyHipHopSessions?src=hash"><s>#</s><b>20plentyHipHopSessions</b></a> Ko Tlhageng yesterday at Ventersdorp. <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/Blacktwitter?src=hash"><s>#</s><b>Blacktwitter</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/ClimateChange?src=hash"><s>#</s><b><strong>ClimateChange</strong></b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/IranWar?src=hash"><s>#</s><b>IranWar</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/hiphop?src=hash"><s>#</s><b>hiphop</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/music?src=hash"><s>#</s><b>music</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/beats?src=hash"><s>#</s><b>beats</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/RhymeFestZA?src=hash"><s>#</s><b>RhymeFestZA</b></a><a class="twitter-timeline-link u-hidden" data-pre-embedded="true" dir="ltr" href="https://t.co/ci341dGBoc">pic.twitter.com/ci341dGBoc</a></p> | We had #20plentyHipHopSessions Ko Tlhageng yesterday at Ventersdorp. #Blacktwitter #ClimateChange #IranWar #hiphop #music #beats #RhymeFestZA pic.twitter.com/ci341dGBoc | 0.000 | 1.000 | 0.000 | 0.0000 | TheHipHopMayor | 0.002180 | 23 |
| 19663648 | TheHipHopMayor | Likwid Nomana | 1206256844827697152 | 1213522649260929031 | /TheHipHopMayor/status/1213522649260929031 | 2020-01-04 18:08:40 | 1578161320 | 0 | 0 | 0 | ... | We had #20plentyHipHopSessions Ko Tlhageng yesterday at Ventersdorp. #Blacktwitter #ClimateChange #IranWar #hiphop #music #beats #RhymeFestZA | <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="in">We had <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/20plentyHipHopSessions?src=hash"><s>#</s><b>20plentyHipHopSessions</b></a> Ko Tlhageng yesterday at Ventersdorp. <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/Blacktwitter?src=hash"><s>#</s><b>Blacktwitter</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/ClimateChange?src=hash"><s>#</s><b><strong>ClimateChange</strong></b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/IranWar?src=hash"><s>#</s><b>IranWar</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/hiphop?src=hash"><s>#</s><b>hiphop</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/music?src=hash"><s>#</s><b>music</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/beats?src=hash"><s>#</s><b>beats</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/RhymeFestZA?src=hash"><s>#</s><b>RhymeFestZA</b></a></p> | We had #20plentyHipHopSessions Ko Tlhageng yesterday at Ventersdorp. #Blacktwitter #ClimateChange #IranWar #hiphop #music #beats #RhymeFestZA | 0.000 | 1.000 | 0.000 | 0.0000 | TheHipHopMayor | 0.002180 | 23 |
19663649 rows × 24 columns
bot_tweets = merged[merged.prob_bot > .5]
human_tweets = merged[merged.prob_bot < .5]
bot_tweets.index = bot_tweets.timestamp
human_tweets.index = human_tweets.timestamp
bot_tweets.timestamp.resample("M").count().plot(figsize=(15,15), title="Climate twitter - number of bot tweets")
bot_tweets.timestamp[bot_tweets["compound"] > 0.05].resample("M").count().plot(color='g')
bot_tweets.timestamp[bot_tweets["compound"] < -0.05].resample("M").count().plot(color='r')
bot_tweets.timestamp[bot_tweets["compound"].between(-.05, .05)].resample("M").count().plot()
plt.legend(["All tweets", "Positive tweets", "Negative tweets", "Neutral tweets"])
plt.ylabel("# of bot tweets per month")
plt.show()
all_tweets_monthly = bot_tweets.timestamp.resample("M").count()
pos_tweets = bot_tweets.timestamp[bot_tweets["compound"] > 0.05].resample("M").count()
pos_tweets_pc = pos_tweets / all_tweets_monthly * 100
neg_tweets = bot_tweets.timestamp[bot_tweets["compound"] < -0.05].resample("M").count()
neg_tweets_pc = neg_tweets / all_tweets_monthly * 100
neutral_tweets = bot_tweets.timestamp[bot_tweets["compound"].between(-.05, .05)].resample("M").count()
neutral_tweets_pc = neutral_tweets / all_tweets_monthly * 100
pos_tweets_pc.plot(figsize=(15,15), title="Climate twitter - tweet sentiment as percentage of all bot climate tweets - by month",color="g")
neg_tweets_pc.plot(color='r')
neutral_tweets_pc.plot()
plt.legend(["Positive tweets", "Negative tweets", "Neutral tweets"])
plt.xlabel("year")
plt.ylabel("% of bot tweets per month")
plt.show()
pattern = r"global.?warming|climate.?chang|sea.?level.?ris|rising.?sea.?level|climate.?crisis|climate.?action|extreme.?weather|biodiversity|IPCC|Paris.?accord"
display_filtered_df(bot_tweets, pattern)
<timed exec>:6: FutureWarning: The default value of regex will change from True to False in a future version. WARNING:itables.downsample:showing 2500x2 of 5000x2 as nbytes=80000>65536=maxBytes. See https://mwouts.github.io/itables/#downsampling
| text_with_emoji_formatted | compound | |
|---|---|---|
| timestamp |
human_tweets.timestamp.resample("M").count().plot(figsize=(15,15), title="Climate twitter - number of non-bot tweets")
human_tweets.timestamp[human_tweets["compound"] > 0.05].resample("M").count().plot(color='g')
human_tweets.timestamp[human_tweets["compound"] < -0.05].resample("M").count().plot(color='r')
human_tweets.timestamp[human_tweets["compound"].between(-.05, .05)].resample("M").count().plot()
plt.legend(["All tweets", "Positive tweets", "Negative tweets", "Neutral tweets"])
plt.ylabel("# of non-bot tweets per month")
plt.show()
all_tweets_monthly = human_tweets.timestamp.resample("M").count()
pos_tweets = human_tweets.timestamp[human_tweets["compound"] > 0.05].resample("M").count()
pos_tweets_pc = pos_tweets / all_tweets_monthly * 100
neg_tweets = human_tweets.timestamp[human_tweets["compound"] < -0.05].resample("M").count()
neg_tweets_pc = neg_tweets / all_tweets_monthly * 100
neutral_tweets = human_tweets.timestamp[human_tweets["compound"].between(-.05, .05)].resample("M").count()
neutral_tweets_pc = neutral_tweets / all_tweets_monthly * 100
pos_tweets_pc.plot(figsize=(15,15), title="Climate twitter - tweet sentiment as percentage of all non-bot climate tweets - by month",color="g")
neg_tweets_pc.plot(color='r')
neutral_tweets_pc.plot()
plt.legend(["Positive tweets", "Negative tweets", "Neutral tweets"])
plt.xlabel("year")
plt.ylabel("% of non-bot tweets per month")
plt.show()
%%time
deniers_merged = pd.merge(deniers, botscores, on="user_id")
deniers_merged
CPU times: user 3.87 s, sys: 59 ms, total: 3.93 s Wall time: 3.92 s
| username | fullname | user_id | tweet_id | tweet_url | timestamp | timestamp_epochs | replies | retweets | likes | ... | text | html | text_with_emoji | neg | neu | pos | compound | screen_name | prob_bot | n_tweets | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | jhayrocas | Jay | 926061 | 543666102 | /jhayrocas/status/543666102 | 2007-12-29 13:00:26 | 1198933226 | 0 | 0 | 0 | ... | who said global warming is a hoax, it's december and i'm sweating in silang! | <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en">who said <strong>global warming</strong> is a hoax, it's december and i'm sweating in silang!</p> | who said global warming is a hoax, it's december and i'm sweating in silang! | 0.150 | 0.750 | 0.100 | -0.2003 | jhayrocas | 0.000852 | 15 |
| 1 | Rubenerd | Ruben Schade 🔰 | 875971 | 520722232 | /Rubenerd/status/520722232 | 2007-12-21 10:39:15 | 1198233555 | 0 | 0 | 0 | ... | @IntoYourHead I heard from Hank that Global Warming is a conspiracy cooked up by polar bears or something to stop artic exploration! | <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en"><a class="twitter-atreply pretty-link js-nav" data-mentioned-user-id="49491971" dir="ltr" href="/intoyourhead"><s>@</s><b>IntoYourHead</b></a> I heard from Hank that <strong>Global Warming</strong> is a conspiracy cooked up by polar bears or something to stop artic exploration!</p> | @IntoYourHead I heard from Hank that Global Warming is a conspiracy cooked up by polar bears or something to stop artic exploration! | 0.215 | 0.657 | 0.128 | -0.5255 | Rubenerd | 0.023427 | 35 |
| 2 | Rubenerd | Ruben Schade 🔰 | 875971 | 520722232 | /Rubenerd/status/520722232 | 2007-12-21 10:39:15 | 1198233555 | 0 | 0 | 0 | ... | @IntoYourHead I heard from Hank that Global Warming is a conspiracy cooked up by polar bears or something to stop artic exploration! | <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en"><a class="twitter-atreply pretty-link js-nav" data-mentioned-user-id="49491971" dir="ltr" href="/intoyourhead"><s>@</s><b>IntoYourHead</b></a> I heard from Hank that <strong>Global Warming</strong> is a conspiracy cooked up by polar bears or something to stop artic exploration!</p> | @IntoYourHead I heard from Hank that Global Warming is a conspiracy cooked up by polar bears or something to stop artic exploration! | 0.215 | 0.657 | 0.128 | -0.5255 | Rubenerd | 0.023427 | 35 |
| 3 | Rubenerd | Ruben Schade 🔰 | 875971 | 800304651 | /Rubenerd/status/800304651 | 2008-04-30 12:16:12 | 1209557772 | 0 | 0 | 0 | ... | @ons I've almost finished my report on the "Oil companies discrediting global warming to save their business" conspiracy theory | <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en"><a class="twitter-atreply pretty-link js-nav" data-mentioned-user-id="219275799" dir="ltr" href="/ONS"><s>@</s><b>ons</b></a> I've almost finished my report on the "Oil companies discrediting <strong>global warming</strong> to save their business" conspiracy theory</p> | @ons I've almost finished my report on the "Oil companies discrediting global warming to save their business" conspiracy theory | 0.140 | 0.661 | 0.198 | 0.1027 | Rubenerd | 0.023427 | 35 |
| 4 | conservatweet | conservatweet | 9335232 | 490757572 | /conservatweet/status/490757572 | 2007-12-11 17:41:10 | 1197394870 | 0 | 0 | 0 | ... | [Hot Air]: Max Mayfield destroys Democrats’ global warming conspiracy: Imperfect storm. http://tinyurl.com/2vlhas | <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en">[Hot Air]: Max Mayfield destroys Democrats’ <strong>global warming</strong> conspiracy: Imperfect storm. http://tinyurl.com/2vlhas</p> | [Hot Air]: Max Mayfield destroys Democrats’ global warming conspiracy: Imperfect storm. http://tinyurl.com/2vlhas | 0.492 | 0.423 | 0.085 | -0.8271 | NaN | NaN | 2265 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 511151 | MisterTimmr1 | Old Timmr | 4879535764 | 1212176671731634176 | /MisterTimmr1/status/1212176671731634176 | 2020-01-01 01:00:14 | 1577840414 | 0 | 0 | 0 | ... | @LindseyGrahamSC also said climate change was real. He's a flake or fake. Take your pick. | <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en"><a class="twitter-atreply pretty-link js-nav" data-mentioned-user-id="432895323" dir="ltr" href="/LindseyGrahamSC"><s>@</s><b>LindseyGrahamSC</b></a> also said <strong>climate change</strong> was real. He's a flake or fake. Take your pick.</p> | @LindseyGrahamSC also said climate change was real. He's a flake or fake. Take your pick. | 0.181 | 0.819 | 0.000 | -0.4767 | MisterTimmr1 | 0.001588 | 28 |
| 511152 | right_populist | TruthTeller | 1209227323079372810 | 1212167901379207169 | /right_populist/status/1212167901379207169 | 2020-01-01 00:25:23 | 1577838323 | 2 | 0 | 1 | ... | It’s funny because he probably just search up 'climate change is a hoax' and looked for an article and shared it with us. Haha | <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en">It’s funny because he probably just search up '<strong>climate change</strong> is a hoax' and looked for an article and shared it with us. Haha</p> | It’s funny because he probably just search up ' climate change is a hoax' and looked for an article and shared it with us. Haha | 0.067 | 0.669 | 0.264 | 0.7351 | RightMinarchist | 0.001528 | 14 |
| 511153 | KarlHaymann | Karlos | 1205040259903287298 | 1212165790964649985 | /KarlHaymann/status/1212165790964649985 | 2020-01-01 00:17:00 | 1577837820 | 0 | 0 | 0 | ... | The conspiracy theory is already doing the rounds amongst RWNJ that greens and climate change activists are starting them to blame Climate change, go figure. pic.twitter.com/YEFTEErKD1 | <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en">The conspiracy theory is already doing the rounds amongst RWNJ that greens and <strong>climate change</strong> activists are starting them to blame <strong>Climate change</strong>, go figure. <a class="twitter-timeline-link u-hidden" data-pre-embedded="true" dir="ltr" href="https://t.co/YEFTEErKD1">pic.twitter.com/YEFTEErKD1</a></p> | The conspiracy theory is already doing the rounds amongst RWNJ that greens and climate change activists are starting them to blame Climate change , go figure. pic.twitter.com/YEFTEErKD1 | 0.188 | 0.812 | 0.000 | -0.7003 | NaN | NaN | 38 |
| 511154 | TheRealGremlich | Michael Nolen | 3332482995 | 1212162390395101185 | /TheRealGremlich/status/1212162390395101185 | 2020-01-01 00:03:29 | 1577837009 | 0 | 0 | 0 | ... | Look at this from 2004. Our country was being in danger back then, not from climate change, but hoaxers. It's not just unending wars, it's this making elitists and globalists money. Trump is dangerous because he found out.\n\n https://amp.theguardian.com/environment/2004/feb/22/usnews.theobserver?__twitter_impression=true … | <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en">Look at this from 2004. Our country was being in danger back then, not from <strong>climate change</strong>, but hoaxers. It's not just unending wars, it's this making elitists and globalists money. Trump is dangerous because he found out.\n\n <a class="twitter-timeline-link" data-expanded-url="https://amp.theguardian.com/environment/2004/feb/22/usnews.theobserver?__twitter_impression=true" dir="ltr" href="https://t.co/CzmXGTG36x" rel="nofollow noopener" target="_blank" title="https://amp.theguardian.com/environment/2004/feb/22/usnews.theobserver?__twitter_impression=true"><span class="tco-ellipsis"></span><span class="invisible">https://</span><span class="js-display-url">amp.theguardian.com/environment/20</span><span class="invisible">04/feb/22/usnews.theobserver?__twitter_impression=true</span><span class="tco-ellipsis"><span class="invisible"> </span>…</span></a></p> | Look at this from 2004. Our country was being in danger back then, not from climate change , but hoaxers. It's not just unending wars, it's this making elitists and globalists money. Trump is dangerous because he found out.\n\n https://amp.theguardian.com/environment/2004/feb/22/usnews.theobserver?__twitter_impression=true … | 0.132 | 0.788 | 0.081 | -0.3536 | NaN | NaN | 28 |
| 511155 | lafendad | Lafendad | 1206865510891237376 | 1212161581741748226 | /lafendad/status/1212161581741748226 | 2020-01-01 00:00:16 | 1577836816 | 1 | 0 | 0 | ... | Or how about an international scientific journal that makes the connection between the 1980s Acid Rain scare and the current Climate Change hoax for you?\n\nhttps://principia-scientific.org/anatomy-of-an-environmental-scare/ … | <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en">Or how about an international scientific journal that makes the connection between the 1980s Acid Rain scare and the current <strong>Climate Change</strong> hoax for you?\n\n<a class="twitter-timeline-link u-hidden" data-expanded-url="https://principia-scientific.org/anatomy-of-an-environmental-scare/" dir="ltr" href="https://t.co/xMACJIKqsa" rel="nofollow noopener" target="_blank" title="https://principia-scientific.org/anatomy-of-an-environmental-scare/"><span class="tco-ellipsis"></span><span class="invisible">https://</span><span class="js-display-url">principia-scientific.org/anatomy-of-an-</span><span class="invisible">environmental-scare/</span><span class="tco-ellipsis"><span class="invisible"> </span>…</span></a></p> | Or how about an international scientific journal that makes the connection between the 1980s Acid Rain scare and the current Climate Change hoax for you?\n\n https://principia-scientific.org/anatomy-of-an-environmental-scare/ … | 0.175 | 0.825 | 0.000 | -0.6486 | NaN | NaN | 15 |
511156 rows × 24 columns
deniers_merged.index = deniers_merged.timestamp
all_tweets_monthly = df.timestamp.resample("M").count()
bot_denier_tweets = deniers_merged[deniers_merged.prob_bot > .5].timestamp.resample("M").count()
human_denier_tweets = deniers_merged[deniers_merged.prob_bot < .5].timestamp.resample("M").count()
bot_denier_tweet_pc = bot_denier_tweets / all_tweets_monthly * 100
human_denier_tweet_pc = human_denier_tweets / all_tweets_monthly * 100
bot_denier_tweet_pc.plot(figsize=(15,15), title="Climate denier twitter - percentage of all climate tweets - by month",color="r",label="Bots")
human_denier_tweet_pc.plot(color='b', label="Humans")
plt.legend(["Bots", "Humans"])
plt.ylabel("% of tweets per month")
plt.xlabel("year")
plt.show()
pattern = r"fake|not real|isn'?t real|doesn'?t exist|hoax|propaganda|conspiracy"
bot_deniers = deniers_merged[deniers_merged.prob_bot > .5]
display_filtered_df(bot_deniers, pattern)
<timed exec>:6: FutureWarning: The default value of regex will change from True to False in a future version. WARNING:itables.downsample:showing 2500x2 of 5000x2 as nbytes=80000>65536=maxBytes. See https://mwouts.github.io/itables/#downsampling
| text_with_emoji_formatted | compound | |
|---|---|---|
| timestamp |